from elasticsearch import Elasticsearch
from pymongo import MongoClient
import re

# mongodb client 900 items
client = MongoClient('mongodb://localhost:27017/')
db = client['cookbook']
collection = db['cooks2']

# es client
# es = Elasticsearch(["http://localhost:9200/"])
es = Elasticsearch(["http://119.29.116.62:9200/"])


def get_suggests(title, tags):
    suggests = set()
    ts = es.indices.analyze(body={
        "analyzer": "ik_smart",
        "text": title
    })
    for t in ts['tokens']:
        suggests.add(t.get('token'))

    for tag in tags:
        gs = es.indices.analyze(body={
            "analyzer": "ik_smart",
            "text": tag
        })
        for g in gs['tokens']:
            suggests.add(g.get('token'))
    print(suggests)
    return list(suggests)


def get_step_timers(steps):
    step_timers = []
    for _index, step in enumerate(steps):
        ts = es.indices.analyze(body={
            "analyzer": "ik_smart",
            "text": step
        })
        for t in ts['tokens']:
            if t['type'] == 'TYPE_CQUAN':
                step_timers.append({
                    'index': _index,
                    'timer': get_accuracy_time(t['token'])
                })
    return step_timers


def get_number_from_time(time):
    return re.sub("\D", "", time)


def is_minute(time):
    suffix = re.sub("\d", "", time)
    number = get_number_from_time(time)
    if suffix == "小时":
        return int(number) * 60
    elif suffix == "秒":  # 如果是秒则将其扩大为1分钟
        return 1
    elif suffix == "分钟" or suffix == "分":
        return int(number)
    else:
        return 0


def get_accuracy_time(desc):
    res = ''
    ts = es.indices.analyze(body={
        "analyzer": "ik_smart",
        "text": desc
    })
    for t in ts['tokens']:
        if t['type'] == 'TYPE_CQUAN':
            res = t['token']
    # is_minute(res)
    return is_minute(res)


def item_to_doc(item):
    date = item.get('date_info').split('/')[0].strip()

    user_infos = item.get('user_info').split('/')
    order = user_infos[0].split('：')[1].strip()
    watch = user_infos[1].split('：')[1].strip()
    fans = user_infos[2].split('：')[1].strip()

    pic_url = item.get('pic_url')

    steps = item.get('steps')

    tags = item.get('tags')

    step_urls = item.get('step_urls')

    title = item.get('title')

    suggest = get_suggests(title=title, tags=tags)

    step_timers = get_step_timers(steps)

    ready_time = get_accuracy_time(item.get('ready_time'))

    cost_time = get_accuracy_time(item.get('cost_time'))

    materials = []
    for text, pic in zip(item.get('materials'), item.get('material_pics')):
        materials.append({
            "text": text,
            "pic": pic
        })

    doc = {
        'date': date,
        'fans': fans,
        'order': order,
        'pic_url': pic_url,
        'step_urls': step_urls,
        'steps': steps,
        'step_timers': step_timers,
        'suggest': suggest,
        'tags': tags,
        'title': title,
        'watch': watch,
        'ready_time': ready_time,
        'cost_time': cost_time,
        'materials': materials
    }
    # print(doc)
    es.index(index="cookbooks", doc_type="cookbook", body=doc)


def mongo_item_pipeline():
    for item in collection.find({}):
        item_to_doc(item)


if __name__ == '__main__':
    mongo_item_pipeline()

# print(es.get(index="cookbooks", doc_type="cookbook", id="7czox2IB8rNDQhtMbwGX"))

# es.index(index="cookbooks", doc_type="cookbook", body={
#     "date": "2018-02-05",
#     "fans": 54506,
#     "order": 4201,
#     "pic_url": "http://s1.st.meishij.net/r/127/79/6144877/s6144877_151766522396327.jpg",
#     "step_urls": [
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766603729926.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766604651913.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766605874439.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766606711296.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766608189653.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766576313517.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766578715455.jpg",
#         "http://s1.st.meishij.net/rs/127/79/6144877/n6144877_151766589215054.jpg"
#     ],
#     "steps": [
#         "肉馅加入花椒水，料酒，酱油，生抽，姜末，加入鸡蛋，淀粉，面粉，味精搅打上劲",
#         "制成四个大丸子",
#         "锅中多放点食用油，油大约7成热时下入丸子",
#         "炸制金黄色捞出控干油备用",
#         "锅底油，加入白糖冒小泡发黄时，加入大料，花椒加入葱姜蒜煸香",
#         "加入水，酱油，调口，大火烧开，中小火焖20分钟，汤汁浓稠时就可以了",
#         "油菜清洗干净，大的一切两开，放水开水锅中焯水",
#         "小油菜围边，丸子码放盘中，汤汁浇在丸子上"
#     ],
#     "step_timers": [
#         {
#             "index": 1,
#             "timer": "20分钟"
#         },
#         {
#             "index": 3,
#             "timer": "20秒"
#         }
#     ],
#     "suggest": [
#         "四喜丸子"
#     ],
#     "tags": ["丸子"],
#     "title": "四喜丸子",
#     "watch": 22
# })
